import pandas as pd
import numpy as np
import pickle

atp_tennis = pd.read_csv('atp_tennis.csv')

# Filter the affected matches for quarterfinals, semifinals, and finals
later_rounds = atp_tennis[atp_tennis['Round'].isin(['Quarterfinals', 'Semifinals', 'Finals'])]

# Merge the original odds and updated odds dataframes on Match ID
later_rounds_odds = later_rounds.merge(updated_odds_df[['Match ID', 'Updated Odd_1', 'Updated Odd_2']], left_index=True, right_on='Match ID')

# Calculate the original and updated match favorites
later_rounds_odds['Original Favorite'] = later_rounds_odds.apply(lambda row: row['Player_1'] if row['Odd_1'] < row['Odd_2'] else row['Player_2'], axis=1)
later_rounds_odds['Updated Favorite'] = later_rounds_odds.apply(lambda row: row['Player_1'] if row['Updated Odd_1'] < row['Updated Odd_2'] else row['Player_2'], axis=1)

# Calculate the changes in match favorites and shifts in tournament dynamics
later_rounds_odds['Change in Favorite'] = later_rounds_odds['Original Favorite'] != later_rounds_odds['Updated Favorite']
later_rounds_odds['Shift in Tournament Dynamics'] = later_rounds_odds['Change in Favorite'].sum()

# Create a dataframe with the required columns
impact_df = later_rounds_odds[['Match ID', 'Round', 'Odd_1', 'Odd_2', 'Updated Odd_1', 'Updated Odd_2', 'Original Favorite', 'Updated Favorite', 'Change in Favorite', 'Shift in Tournament Dynamics']]

print(impact_df)
pickle.dump(impact_df,open("./ref_result/impact_df.pkl","wb"))